# Replication Archive for
# Coppock, Alexander. Avoiding Post-Treatment Bias in Audit Experiments
# Journal of Experimental Political Science, Forthcoming

# clear workspace
rm(list = ls())

# Load Packages -----------------------------------------------------------

# Uncomment to install
# install.packages("dplyr")
# install.packages("estimatr")
# install.packages("xtable")
# install.packages("broom")

library(dplyr)
library(estimatr)
library(xtable)
library(broom)

# Load Data ---------------------------------------------------------------

# These data obtained from http://dx.doi.org/10.7910/DVN/28158
# Ensure you download as .Rdata if re-downloading

load("voterIDexp_data_sept2014.Rdata")

# rename dataset so no confusion
wnf <- data
rm(data)

# Cleaning ----------------------------------------------------------------

# Subset to VR emails in week one, remove two states
wnf_subset <-
  wnf %>%
  filter(
    !code %in% c(41, 12),
    week == 1,
    first_text == 1
  )

# Recode so no NAs
wnf_subset <-
  within(wnf_subset, {
    bounced <- as.numeric(is.na(response))
    response_no_na <- response
    response_no_na[is.na(response)] <- 0
    friendly_no_na <- friendly
    friendly_no_na[is.na(friendly)] <- 0
    pair_id <- paste0(fullpair, "_", code.fac)
  })

# Finding matched pairs that in which both members respond
pair_level <-
  wnf_subset %>%
  group_by(pair_id) %>%
  summarize(num_response = sum(response, na.rm = TRUE))

wnf_subset <-
  left_join(wnf_subset, pair_level) %>%
  mutate(AR = (num_response == 2)) %>%
  filter(bounced == 0)

# Create dataset of always-responders
wnf_always_responders <-
  filter(wnf_subset, AR) %>%
  filter(response_no_na == 1)

# Two observations that respond don't have a friendliness code, assuming "unfriendly"
wnf_always_responders <-
  within(wnf_always_responders,
         friendly[is.na(friendly)] <- 0)

# analysis ----------------------------------------------------------------

# Naive difference-in-means
fit_1 <- tidy(lm_robust(friendly ~ first_name_latino, data = wnf_subset))

# Redefined outcome Difference-in-means
fit_2 <- tidy(lm_robust(friendly_no_na ~ first_name_latino, data = wnf_subset))

# Difference-in-means among matched pairs in which both members respond
fit_3 <- tidy(lm_robust(friendly ~ first_name_latino, data = wnf_always_responders))

# This estimator adapted from Aronow et al. 2017 source code
bounds_estimator <- function(Y, Z, Fail, Weight) {
  f0 <- sum(Weight[Fail == 1 & Z == 0]) / sum(Weight[Z == 0])
  f1 <- sum(Weight[Fail == 1 & Z == 1]) / sum(Weight[Z == 1])

  trim0 <- (f1) / (1 - f0)
  trim1 <- (f0) / (1 - f1)

  dataf <- data.frame(Y, Z, Fail, Weight)
  datafsort <- dataf[order(Y), ]

  OutS0 <- datafsort[datafsort$Fail == 0 & datafsort$Z == 0, ]
  OutS1 <- datafsort[datafsort$Fail == 0 & datafsort$Z == 1, ]

  OutS0$Weight <- OutS0$Weight / sum(OutS0$Weight)
  OutS1$Weight <- OutS1$Weight / sum(OutS1$Weight)

  OutS0.CDF <- OutS0$Weight
  OutS1.CDF <- OutS1$Weight

  for (i in 2:length(OutS0.CDF)) OutS0.CDF[i] <- OutS0.CDF[i] + OutS0.CDF[i - 1]
  for (i in 2:length(OutS1.CDF)) OutS1.CDF[i] <- OutS1.CDF[i] + OutS1.CDF[i - 1]

  Out0U <- weighted.mean(OutS0$Y[OutS0.CDF > trim0], OutS0$Weight[OutS0.CDF > trim0])
  Out0L <- weighted.mean(OutS0$Y[OutS0.CDF < (1 - trim0)], OutS0$Weight[OutS0.CDF < (1 - trim0)])

  Out1U <- weighted.mean(OutS1$Y[OutS1.CDF > trim1], OutS1$Weight[OutS1.CDF > trim1])
  Out1L <- weighted.mean(OutS1$Y[OutS1.CDF < (1 - trim1)], OutS1$Weight[OutS1.CDF < (1 - trim1)])

  low_est <- Out1L - Out0U
  high_est <- Out1U - Out0L

  return(data.frame(
    low_est = low_est,
    high_est = high_est
  ))
}

# helper function
bounds_wrapper <- function(dat) {
  Y <- dat$friendly_no_na
  Fail <- 1 - dat$response_no_na
  Z <- dat$first_name_latino
  Weight <- rep(1, length(Y))

  bounds_estimator(
    Y = Y,
    Z = Z,
    Fail = Fail,
    Weight = Weight
  )
}

# Bounds point estimate
bounds_est <- 
  bounds_wrapper(wnf_subset) %>% 
  mutate(est = paste0("[", round(low_est, 3), ", ", round(high_est, 3), "]"))

# Bootstrapping for uncertainty
set.seed(343)
boot_df <-
  bootstrap(wnf_subset, 1000) %>%
  do(bounds_wrapper(.)) %>%
  ungroup()

bounds_boot <-
  boot_df %>%
  summarize(
    ci_lower = quantile(low_est, 0.025),
    ci_upper = quantile(high_est, 0.975)
  )

bounds_df <- bind_rows(bind_cols(bounds_est, bounds_boot))

# make table --------------------------------------------------------------

Estimator <- c(
  "Naive difference-in-means",
  "Difference-in-means",
  "Difference-in-means among matched pairs in which both members respond",
  "Bounds"
)

Estimand <- c(
  "Undefined",
  "ATE on redefined outcome",
  "ATE among matched pairs in which both members are Always-Responders",
  "ATE among all Always-Responders"
)

estimates <- c(
  round(fit_1$coefficients[2], 3),
  round(fit_2$coefficients[2], 3),
  round(fit_3$coefficients[2], 3),
  bounds_df$est
)

cis <- c(
  paste0("[", round(fit_1$ci_lower[2], 3), ", ", round(fit_1$ci_upper[2], 3), "]"),
  paste0("[", round(fit_2$ci_lower[2], 3), ", ", round(fit_2$ci_upper[2], 3), "]"),
  paste0("[", round(fit_3$ci_lower[2], 3), ", ", round(fit_3$ci_upper[2], 3), "]"),
  paste0("[", round(bounds_df$ci_lower, 3), ", ", round(bounds_df$ci_upper, 3), "]")
)

mat <- cbind(Estimand, Estimator, estimates, cis)
mat

# Uncomment to produce table
# print.xtable(
#   xtable(mat),
#   only.contents = TRUE,
#   include.rownames = FALSE,
#   include.colnames = FALSE,
#   hline.after = c(),
#   file = "results.tex"
# )
